






use working/cleaned_valid_sequential_records, clear

* Re-format string variables to make them a bit eaier to read in column
sort record_order 
format pr_name_gn pr_name_surn %10s
format pr_age pr_name_titles_terms  %15s
format relation_to_acct_holder relation_to_acct_holder_std complexion plantation regiment_company %25s
format occupation pr_name pr_name_gn works_for residence %35s
format birth_place where_brought_up remarks regiment_company plantation %50s
order city date new_account organization pr_name relation_to_acct_holder_std residence age_in_years complexion occupation where_brought_up birth_place 

* recast where_brought_up and birth_place from strL to str#, since they will be merge keys
assert length(where_brought_up)<=181 & length(birth_place) <=181
recast str where_brought_up birth_place

*********************
* Complexion filter *
*********************

* Categorize Complexion

gen white = regexm(lower(complexion),"^white|^wht|^wh\.|^w\.|^wh$|ginger") & !regexm(lower(complexion), "near|almost") if !mi(complexion)
gen black = regexm(complexion,"Bl[ack]|Bc|blk|[Bb]lack|B[k\.]|^Bl?\.?$|B'k") if !mi(complexion)
gen brown = regexm(complexion,"[Bb]r[^iu]|[Mm]ed|[Oo]live|^Br$") if !mi(complexion)
gen dark = regexm(complexion,"[Dd]ar|[Dd][kr]") if !mi(complexion)
gen mixed = regexm(lower(complexion), "mul|gri|gr?\.|mix") if !mi(complexion)
gen colored = regexm(lower(complexion), "colou*r") if !mi(complexion)
gen yellow = regexm(lower(complexion), "yel|^y\.?$| y\.|^y\.") if !mi(complexion)
gen cold = regexm(lower(complexion),"cold") if !mi(complexion)
gen light = regexm(lower(complexion), "ligh|lgt|lt|almost|near|bright|about|[a-z\.] white|fair|lite|^l\.?$|^l\.? ") if !mi(complexion)
egen _temp = rowmax(white-light)
gen other_likely_black = _temp ==0  if !mi(complexion)
drop _temp
gen listed_complexion = !mi(complexion)


**********************
* Job Categorization *
**********************
capture gen job = ""
replace job = ""
replace job = "service" if ustrregexm(lower(occupation), "restaur|grocer|cook|bar|^wait|porter|steward|service|servt")
replace job = "student" if ustrregexm(lower(occupation), "student|university|school|pupil|sch\.")
replace job = "educator" if ustrregexm(lower(occupation), "teach|superintendent")
replace job = "construction" if ustrregexm(lower(occupation), "brick|wood|plasterer|mason|trim|roofer")
replace job = "construction" if ustrregexm(occupation, "Hod")
replace job = "transportation" if ustrregexm(lower(occupation), "cart|hackman|coach|wagon|stevedore|teamster|steam|boat|pilot")
replace job = "agriculture" if ustrregexm(lower(occupation), "cotton|farm|stable|plant[^a]")
replace job = "domestic" if ustrregexm(lower(occupation), "poter|sew|dress|butler|domestic|laund|seam|iron|nurse|maid|(^| )house")
replace job = "domestic" if ustrregexm(lower(occupation),"(^| )wash") & !ustrregexm(lower(occupation ),"white")
replace job = "sales" if ustrregexm(lower(occupation),"(sells? |sales)")
replace job = "trades" if ustrregexm(lower(occupation), "fireman|fish(er|ing)|policeman|nurse|nursing|tailor|mechanic|machinist|butcher|carpenter|blacksmith|shoe|barber|apprentice|cooper|host?ler|build[ies]|paint|musician|baker|cutter|oyster|gardner|garden|confect|maker|wright")
replace job = "unskilled" if ustrregexm(lower(occupation), "canal|sailor|sailing|watchman|boot black|boy|labour|labor|soldier|huckster|seaman|mill|janitor|jobbing|laborer|servant|hand|driv|dray|anything|messenger")
replace job = "unskilled" if ustrregexm(occupation, "Lab|^Works$")
replace job = "skilled" if ustrregexm(lower(occupation), "merchant|engineer|physician|dentist|accountant|architect|administrator|lawyer|attorney|book|clerk|druggist|doctor|printer|scholar|treasurer|banker")
replace job = "religious" if ustrregexm(lower(occupation), "rev|church|^min|minister|clergy|preach")
replace job = "factory" if ustrregexm(lower(occupation),"(^| )fac|tob\.|t\. f\.|tredegar")
replace job = "unemployed" if ustrregexm(lower(occupation), "at home")

* Of the remaining list of professions the final suffix "-er" seems to capture 95% tradesmen
replace job = "trades" if ustrregexm(occupation ,"er$") & mi(job)

* goal is to make job education high, medium, or low based on basic classifications of the above jobs
capture gen job_education = ""
replace job_education = ""
replace job_education = "Low" if inlist(job, "unemployed", "unskilled", "student", "service",  "domestic")
replace job_education = "Medium" if inlist(job, "trades", "construction", "religious", "transportation","agriculture", "factory")
replace job_education = "High" if inlist(job, "skilled", "educator")


save working/demographics, replace

**********************
* Geocoded Locations *
**********************


capture program drop reassign_state
program reassign_state
	replace person_state = "south carolina" if regexm(_temp,"S\.C\.") 
	replace person_state = "virginia" if regexm(_temp,"Va\.") 
	replace person_state = "georgia" if regexm(_temp,"Ga\.") 
	replace person_state = "tennessee" if regexm(_temp,"Tenn\.") 
	replace person_state = "mississippi" if regexm(_temp,"Miss\.") 
	replace person_state = "louisiana" if regexm(_temp,"La\.") 
	replace person_state = "north carolina" if regexm(_temp,"N\.C\.") 
	replace person_state = "alabama" if regexm(_temp,"Ala\.") 
	replace person_state = "kentucky" if regexm(_temp,"Ky\.") 
	replace person_state = "new york" if regexm(_temp,"N\.Y\.") 
	replace person_state = "georgia" if regexm(_temp,"Ga\.") 
	replace person_state = "maryland" if regexm(_temp,"Md\.") 
	replace person_state = "virginia" if regexm(_temp,"Va\.") 
	replace person_state = "new jersey" if regexm(_temp,"N\.J\.") 
	replace person_state = "tennessee" if regexm(_temp,"Tenn\.") 
	replace person_state = "mississippi" if regexm(_temp,"Miss\.") 
	replace person_state = "massachusetts" if regexm(_temp,"Mass\.") 
	replace person_state = "georgia" if regexm(_temp,"Savh\.") 
	replace person_state = "alabama" if regexm(_temp,"Ala\.") 
	replace person_state = "florida" if regexm(_temp,"Fla\.") 
	replace person_state = "pennsylvania" if regexm(_temp,"Pa\.") 
	replace person_state = "washington" if regexm(_temp,"D\.C\.")
	replace person_state = "kentucky" if regexm(_temp,"Ky\.") 
	replace person_state = "georgia" if regexm(_temp,"Savh\.") 
	replace person_state = "arkansas" if regexm(_temp,"Ark\.") 
	replace person_state = "connecticut" if regexm(_temp,"Ct\.") 
	replace person_state = "connecticut" if regexm(_temp,"Conn\.") 
	replace person_state = "new york" if regexm(_temp,"N\.Y\.") 
	replace person_state = "georgia" if regexm(_temp,"Geo\.") 
	replace person_state = "north carolina" if regexm(_temp,"N\.C\.") 
	replace person_state = "louisiana" if regexm(_temp,"N\.O\.") 
	replace person_state = "missouri" if regexm(_temp,"Mo\.") 
	replace person_state = "georgia" if regexm(_temp,"Co\.Ga\.") 
	replace person_state = "louisiana" if regexm(_temp,"La\.") 
	replace person_state = "south carolina" if regexm(_temp,"So\.Car\.") 
	replace person_state = "south carolina" if regexm(_temp,"So\.Ca\.") 
	replace person_state = "virginia" if regexm(_temp,"Co\.Va\.") 
	replace person_state = "new jersey" if regexm(_temp,"N\.J\.") 
	replace person_state = "new hampshire" if regexm(_temp,"N\.H\.") 
	replace person_state = "kentucky" if regexm(_temp,"Kty\.") 
	replace person_state = "arkansas" if regexm(_temp,"Arks\.") 
	replace person_state = "virginia" if regexm(_temp,"Vir\.") 
	replace person_state = "south carolina" if regexm(_temp,"S\.C\.") 
	replace person_state = "pennsylvania" if regexm(_temp,"Penn\.") 
	replace person_state = "alabama" if regexm(_temp,"Alab\.") 
	replace person_state = "maryland" if regexm(_temp,"Co\.Md\.") 
	replace person_state = "tennessee" if regexm(_temp,"Co\.Tenn\.") 
	replace person_state = "north carolina" if regexm(_temp,"N\.Car\.") 
	replace person_state = "south carolina" if regexm(_temp,"S\.Car\.") 
	replace person_state = "new hampshire" if regexm(_temp,"N\.H\.") 
	replace person_state = "virginia" if regexm(_temp,"Richd\.") 
	replace person_state = "wisconsin" if regexm(_temp,"Wis\.") 
	replace person_state = "virginia" if regexm(_temp,"Via\.") 
	replace person_state = "alabama" if regexm(_temp,"Co\.Ala\.") 
	replace person_state = "tennessee" if regexm(_temp,"Ten\.") 
	replace person_state = "florida" if regexm(_temp,"Flo\.") 
	replace person_state = "ohio" if regexm(_temp,"Ohio\.") 
	replace person_state = "kentucky" if regexm(_temp,"Ken\.") 
	replace person_state = "illinois" if regexm(_temp,"Ills\.") 
	replace person_state = "north carolina" if regexm(_temp,"N\.Ca\.") 
	replace person_state = "florida" if regexm(_temp,"Fla\.") 
	replace person_state = "delaware" if regexm(_temp,"Del\.") 
	replace person_state = "new york" if regexm(_temp,"Ny\.") 
	replace person_state = "pennsylvania" if regexm(_temp,"Penna\.") 
	replace person_state = "maryland" if regexm(_temp,"M\.D\.") 
	replace person_state = "michigan" if regexm(_temp,"Mich\.") 
	replace person_state = "north carolina" if regexm(_temp,"N\.Ca\.") 
	replace person_state = "south carolina" if regexm(_temp,"So\.Car\.") 
	replace person_state = "pennsylvania" if regexm(_temp,"Phila\.") 
	replace person_state = "texas" if regexm(_temp,"Tex\.") 
	replace person_state = "arkansas" if regexm(_temp,"Co\.Ark\.") 
	replace person_state = "mississippi" if regexm(_temp,"Mis\.") 
	replace person_state = "florida" if regexm(_temp,"Fl\.") 
	replace person_state = "connecticut" if regexm(_temp,"Conn\.") 
	replace person_state = "south carolina" if regexm(_temp,"S\.Ca\.") 
	replace person_state = "alabama" if regexm(_temp,"Al\.") 
	replace person_state = "tennessee" if regexm(_temp,"Mem\.") 
	replace person_state = "south carolina" if regexm(_temp,"So\.Ca\.") 
	replace person_state = "arkansas" if regexm(_temp,"Ark\.") 
	replace person_state = "south carolina" if regexm(_temp,"S\.Car\.") 
	replace person_state = "virginia" if regexm(_temp,"VA\.") 
	replace person_state = "louisiana" if regexm(_temp,"Shrpt\.") 
	replace person_state = "mississippi" if regexm(_temp,"Missippi\.") 
end

clear
use working/demographics

rename where_brought_up fs_location
merge m:1 fs_location using data/geocoded_locations, keep(match master) nogen keepusing(locality administrative* country)

foreach var of varlist locality administrative* country {
	rename `var' wbu_`var'
}
rename fs_location where_brought_up


rename birth_place fs_location
merge m:1 fs_location using data/geocoded_locations, keep(match master) nogen keepusing(locality administrative* country)

foreach var of varlist locality administrative* country {
	rename `var' bp_`var'
}

rename fs_location birth_place 

* Re-format string variables to make them a bit eaier to read in column
sort record_order 
format pr_name_gn pr_name_surn %10s
format pr_age pr_name_titles_terms  %15s
format relation_to_acct_holder relation_to_acct_holder_std complexion plantation regiment_company %25s
format occupation pr_name pr_name_gn works_for residence %35s
format birth_place where_brought_up remarks regiment_company plantation %50s


** Hack to check the state
gen person_state = lower(wbu_administrative_area_level_1)
gen _temp = subinstr(where_brought_up," ","",.)
reassign_state
rename person_state wbu_state
replace wbu_country = "United States" if wbu_state != lower(wbu_administrative_area_level_1)
drop _temp

gen person_state = (bp_administrative_area_level_1)
gen _temp = subinstr(birth_place," ","",.)
reassign_state
rename person_state bp_state
replace bp_country = "United States" if bp_state != lower(bp_administrative_area_level_1)
drop _temp

merge m:1 city using data/branch_data, keep(match) assert(match using) nogen

foreach var of varlist wbu_state bp_state {
	replace `var' = lower(`var')
}



gen brought_up_in_state = lower(wbu_state) == lower(statefull) if !mi(wbu_state)
gen brought_up_in_state_valid = !mi(wbu_state) 
gen born_in_state = lower(bp_state) == lower(statefull) if !mi(bp_state)
gen born_in_state_valid = !mi(bp_state) 

local var1 brought_up_in_state 
local var2 wbu_state
replace `var1' = 1 if city=="Augusta" & inlist(`var2',"south carolina")
replace `var1' = 1 if city=="Baltimore" & inlist(`var2',"pennsylvania" )
replace `var1' = 1 if city=="Beaufort" & inlist(`var2',"georgia")
replace `var1' = 1 if city=="Columbus" & inlist(`var2',"alabama")
replace `var1' = 1 if city=="Huntsville" & inlist(`var2',"tennessee")
replace `var1' = 1 if city=="Louisville" & inlist(`var2',"indiana")
replace `var1' = 1 if city=="Memphis" & inlist(`var2',"arkansas", "mississippi")
replace `var1' = 1 if city=="Mobile" & inlist(`var2',"mississippi")
replace `var1' = 1 if city=="Natchez" & inlist(`var2',"mississipi")
replace `var1' = 1 if city=="New Orleans" & inlist(`var2',"mississippi")
replace `var1' = 1 if city=="Shreveport" & inlist(`var2',"texas")
replace `var1' = 1 if city=="St. Louis" & inlist(`var2',"illinois")
replace `var1' = 1 if city=="Vicksburg" & inlist(`var2',"louisiana")
replace `var1' = 1 if city=="Washington" & inlist(`var2',"maryland", "pennsylvania", "virginia" )


local var1 born_in_state 
local var2 bp_state
replace `var1' = 1 if city=="Augusta" & inlist(`var2',"south carolina")
replace `var1' = 1 if city=="Baltimore" & inlist(`var2',"pennsylvania" )
replace `var1' = 1 if city=="Beaufort" & inlist(`var2',"georgia")
replace `var1' = 1 if city=="Columbus" & inlist(`var2',"alabama")
replace `var1' = 1 if city=="Huntsville" & inlist(`var2',"tennessee")
replace `var1' = 1 if city=="Louisville" & inlist(`var2',"indiana")
replace `var1' = 1 if city=="Memphis" & inlist(`var2',"arkansas", "mississippi")
replace `var1' = 1 if city=="Mobile" & inlist(`var2',"mississippi")
replace `var1' = 1 if city=="Natchez" & inlist(`var2',"mississipi")
replace `var1' = 1 if city=="New Orleans" & inlist(`var2',"mississippi")
replace `var1' = 1 if city=="Shreveport" & inlist(`var2',"texas")
replace `var1' = 1 if city=="St. Louis" & inlist(`var2',"illinois")
replace `var1' = 1 if city=="Vicksburg" & inlist(`var2',"louisiana")
replace `var1' = 1 if city=="Washington" & inlist(`var2',"maryland", "pennsylvania", "virginia" )




label define bank_branch 1 `"Alexandria"'  2 `"Atlanta"'  3 `"Augusta"'  4 `"Baltimore"'  5 `"Beaufort"'  6 `"Charleston"'  7 `"Columbia"'  8 `"Columbus"'  9 `"Huntsville"'  10 `"Jacksonville"'  11 `"Lexington"'  12 `"Little Rock"'  13 `"Louisville"'  14 `"Lynchburg"'  15 `"Macon"'  16 `"Memphis"'  17 `"Mobile"'  18 `"Nashville"'  19 `"Natchez"'  20 `"New Bern"'  21 `"New Orleans"'  22 `"New York"'  23 `"Norfolk"'  24 `"Philadelphia"'  25 `"Raleigh"'  26 `"Richmond"'  27 `"Savannah"'  28 `"Shreveport"'  29 `"St. Louis"'  30 `"Tallahassee"'  31 `"Vicksburg"'  32 `"Washington"'  33 `"Wilmington"', replace

encode city, g(bank) label(bank_branch) noextend 
la var bank "Bank branch"


gen pre_panic_acct =  inrange(date,mdy(1,1,1873),mdy(9,17,1873)) if new_account ==1
gen post_panic_acct =  inrange(date,mdy(9,18,1873),mdy(7,2,1874)) if new_account ==1
gen post_window = date >= mdy(1,1,1873)

gen year = year(date)


drop if date>mdy(7,1,1874) /* Drop accounts from after bank close */

save working/categorized_valid_sequential_records, replace
use working/categorized_valid_sequential_records, clear

merge 1:1 fs_pr_ark using data/residence_tagged, nogen keep(match master) keepusing(residence_*)

merge 1:1 fs_pr_ark using data/wbu_tagged, nogen keep(match master) keepusing(wbu_*)

merge 1:1 fs_pr_ark using data/bp_tagged, nogen keep(match master) keepusing(bp_*)

gen bis = real(bp_in_state )
gen buis = real(wbu_in_state )

gen bic = real(bp_in_county )
gen buic = real(wbu_in_county )
gen local = bis==1 | buis ==1 if !mi(bis) | !mi(buis)


gen unskilled = inlist(job,"unskilled", "domestic") if !mi(job)
gen farmer = inlist(job,"agriculture") if !mi(job)

la var organization "Organization"
la var white "White"
la var unskilled "Unskilled"
la var farmer "Farmer"
la var brought_up_in_state "Raised in State"
la var born_in_state "Born in State"
la var wbu_in_county "Raised in County"
la var wbu_in_state "Raised in State"
la var bp_in_county "Born in County"
la var bp_in_state "Born in State"



* Generate Nonlocal accounts
gen nonlocal = bis==0 & buis==0
replace nonlocal = 1 if bis ==0 & mi(buis)
replace nonlocal = 1 if buis ==0 & mi(bis)

gen new_local_account = new_account==1 & local==1 & white !=1
gen new_nonlocal_account = new_account==1 & nonlocal==1
gen new_white_account = new_account==1 & white==1

save working/sequential_records_with_geography, replace



